options(stringsAsFactors = FALSE)
df <- read.csv("../course-497-submissions-full.csv")
rbind(
total = summary(factor(df$status)),
count_elements = summary(factor(df$status[df$step_id == 86100])),
build_ziggurat = summary(factor(df$status[df$step_id == 86098])),
avian = summary(factor(df$status[df$step_id == 86882])),
binary_op = summary(factor(df$status[df$step_id == 93343]))
)
## correct wrong
## total 29463 32983
## count_elements 340 431
## build_ziggurat 340 422
## avian 297 257
## binary_op 255 246
count_elementscount_sol <- df[df$step_id == 86100 & df$status == "correct", "reply"]
c(
length(count_sol[str_detect(count_sol, "for")]),
length(count_sol[str_detect(count_sol, "apply")]),
length(count_sol[str_detect(count_sol, "table")]),
length(count_sol)
)
## [1] 108 94 112 340
count_elements_sapply <- function(x) {
y <- sort(unique(x)) # or unique(sort(x))?
rbind(y, sapply(y, function(a) sum(a == x)))
}
count_elementscount_elements_table <- function(x) {
t <- table(x)
rbind(as.numeric(rownames(t)), t) # a bit clumsy but whatever
}
length(count_sol[!str_detect(count_sol, "table|apply|for")])
## [1] 32
count_elements_rle <- function(x) {
a <- rle(sort(x))
matrix(c(a$values, a$lengths), nrow = 2, byrow = T)
}
count_elementsset.seed(1825); x <- sample(1:100, 1e5, replace = TRUE) # times = 200
build_zigguratzig_sol <- df[df$step_id == 86098 & df$status == "correct", "reply"]
c(
length(zig_sol),
length(zig_sol[str_count(zig_sol, "for") > 1]), length(zig_sol[str_count(zig_sol, "for") == 1]),
length(zig_sol[!str_detect(zig_sol, "for|while|repeat")]),
length(zig_sol[str_count(zig_sol, "function") > 1])
)
## [1] 340 39 198 51 33
build_ziggurat_outer <- function(n) {
s <- 2 * n - 1
outer(1:s, 1:s, function(x, y) {
x <- n - abs(n - x)
y <- n - abs(n - y)
pmin(x, y)
})
}
build_zigguratbuild_ziggurat_for <- function(n) {
w <- c(1:n, (n-1):1)
l <- length(w)
mat <- matrix(nrow=l, ncol=l)
for(p in 1:l) {
for (q in 1:l) {
mat[p, q] <- min(w[p], w[q])
}
}
if (n==1) mat <- matrix(1)
mat
}
build_ziggurat_apply <- function(n) {
tmp <- matrix(c(1:((n*2-1)^2)), nrow = n*2-1, ncol = n*2-1)
return(n - apply(tmp, 1:2, function(x) max(abs(c(n,n) - which(tmp == x, arr.ind = T)))))
}
build_zigguratbuild_ziggurat_recursive1 <- function(n) {
ziggurat <- function (m, w1, w) {
a <- (w - w1)/2 + 1
b <- (w + w1)/2
m[a:b, a:b] <- m[a:b, a:b] + 1
if (w1 > 1) {
ziggurat(m, w1 - 2, w)
} else m
}
w <- 2*n - 1
ziggurat(matrix(0, ncol=w, nrow=w), w, w)
}
build_ziggurat_recursive2 <- function(n, level = 1) {
m <- matrix(level, nrow = n*2 - 1, ncol = n*2 - 1)
if (n > 1) {
m[2:(nrow(m) - 1), 2:(ncol(m) - 1)] <- build_ziggurat_recursive2(n - 1, level + 1)
}
m
}
Reduce et al. : funprog is funReduce(f, v): \(l_1 = f(v_1, v_2), l_2 = f(l_1, v_3), ..., l_{n-1} = f(l_{n-2}, v_n)\)
set.seed(1961); sample_pool <- 1:100
l <- replicate(20, sample(sample_pool, 10), simplify = FALSE)
setdiff(sample_pool, Reduce(union, l))
## [1] 18 23 44 58 61 65 67 71 72 75 80 82 94 96
build_ziggurat_reduce <- function(n) {
stage <- function(k, n) {
m <- matrix(0, 2*n - 1, 2*n - 1)
ind <- k:(2*n - k)
m[ind, ind] <- 1
m
}
Reduce(`+`, lapply(1:n, function(i) stage(i, n)))
}
build_zigguratn <- 50 # times = 40
avianavian <- read.csv("avianHabitat.csv")
avian %>%
select(Site, Observer, contains("Ht")) %>%
mutate(Site = factor(str_replace(Site, "[:digit:]+", ""))) %>%
group_by(Site, Observer) %>%
summarise_each(funs(sum(. > 0)))
## Source: local data frame [11 x 8]
## Groups: Site [?]
##
## Site Observer DBHt WHt EHt AHt HHt LHt
## (fctr) (fctr) (int) (int) (int) (int) (int) (int)
## 1 BunkerHill JT 53 36 63 5 68 15
## 2 BunkerHill RA 56 46 65 6 70 24
## 3 CreteCreek RA 49 42 78 0 91 78
## 4 CreteCreek RR 43 32 67 0 80 74
## 5 HortonCreek JT 39 38 70 0 75 65
## 6 HortonCreek RA 49 60 116 0 123 110
## 7 HortonCreek RR 21 21 45 1 49 40
## 8 LivingstonCreek RA 49 105 106 0 134 95
## 9 LivingstonCreek RR 47 115 106 0 154 98
## 10 McAdamCreek RA 58 114 122 0 139 85
## 11 McAdamCreek RR 28 38 49 0 50 30
avianavian %>%
select(Site, Observer, contains("Ht")) %>%
mutate(Site = factor(str_replace(Site, "[:digit:]+", ""))) %>%
gather(Species, Height, -Site, -Observer) %>%
group_by(Site, Observer, Species) %>%
summarise(Result = sum(Height > 0)) %>%
filter(Result > 100)
## Source: local data frame [12 x 4]
## Groups: Site, Observer [4]
##
## Site Observer Species Result
## (fctr) (fctr) (chr) (int)
## 1 HortonCreek RA EHt 116
## 2 HortonCreek RA HHt 123
## 3 HortonCreek RA LHt 110
## 4 LivingstonCreek RA EHt 106
## 5 LivingstonCreek RA HHt 134
## 6 LivingstonCreek RA WHt 105
## 7 LivingstonCreek RR EHt 106
## 8 LivingstonCreek RR HHt 154
## 9 LivingstonCreek RR WHt 115
## 10 McAdamCreek RA EHt 122
## 11 McAdamCreek RA HHt 139
## 12 McAdamCreek RA WHt 114
avianavian_base <- avian
coverage_variables <- names(avian)[grepl("Ht", names(avian))]
avian_base <- avian_base[, c("Site", "Observer", coverage_variables)]
avian_base$Site <- factor(gsub("\\d", "", avian_base$Site))
avian_base <- reshape(avian_base, direction = "long", varying = coverage_variables,
v.names = "Height", timevar = "Species", times = coverage_variables)
avian_base <- avian_base[, c("Site", "Observer", "Species", "Height")]
subset(
aggregate(avian_base[, "Height", drop = F],
list(Site = avian_base$Site, Observer = avian_base$Observer, Species = avian_base$Species),
function(x) sum(x > 0)),
Height > 100
)
## Site Observer Species Height
## 27 HortonCreek RA EHt 116
## 28 LivingstonCreek RA EHt 106
## 29 McAdamCreek RA EHt 122
## 32 LivingstonCreek RR EHt 106
## 38 HortonCreek RA HHt 123
## 39 LivingstonCreek RA HHt 134
## 40 McAdamCreek RA HHt 139
## 43 LivingstonCreek RR HHt 154
## 49 HortonCreek RA LHt 110
## 61 LivingstonCreek RA WHt 105
## 62 McAdamCreek RA WHt 114
## 65 LivingstonCreek RR WHt 115
"%+for%" <- function(x, y) {
a <- max(length(x), length(y))
v <- rep(NA, a)
for (i in 1:a) { # a == 0 ??
a[i] <- x[i] + y[i]
}
#print(a) # ??
a
}
bin_sol <- df[df$step_id == 93343 & df$status == "correct", "reply"]
length(bin_sol[!str_detect(bin_sol, "for|if|1:")])
## [1] 31
"%+len%" <- function(x, y) {
length(x) <- length(y) <- max(length(x), length(y))
x + y
}
"%+vap%" <- function(x, y) {
ind <- max(length(x), length(y))
vapply(seq_len(ind), function(i) x[i] + y[i], numeric(1)) # seq.int
}
autoplot(microbenchmark(1:1e2 + 1:1e4, 1:1e2 %+for% 1:1e4, 1:1e2 %+len% 1:1e4, 1:1e2 %+vap% 1:1e4, times = 1000))